# dependencies
library(tidyverse)
library(scales)
library(stringr)
library(effectsize)
library(janitor)
library(knitr)
library(kableExtra)

Single items attitude scales - AIID

load("../data/aiid/processed/AIID_subset_confirmatory.RData")

data_trimmed <- AIID_subset_confirmatory %>%
  #filter(exclude_iat_stricter == FALSE) %>%
  filter(english_fluency %in% c("English fluent - speak/read it regularly", "English is my primary language")) |>
  dplyr::select(user_id, 
                domain, 
                age, 
                sex, 
                english_fluency, 
                exclude_iat_stricter, 
                block_order, 
                iat_type,
                D,
                # self-report attitudes data
                prefer, 
                others_prefer, 
                actual_x, actual_y, 
                actual_diff,
                gut_x, gut_y,
                # individual differences scales data
                individual_differences_measure, 
                individual_differences_sum_score)

Correlations

Cors

dat_subset <- data_trimmed |>
  filter(domain %in% c("Poor People - Rich People",
                       "Conservatives - Liberals",
                       "Burger King - McDonald's",
                       "Protestants - Catholics"))

dat_r <- dat_subset |>
  group_by(domain) |>
  summarise(n = n(), 
            mean_prefer = mean(prefer, na.rm = TRUE),
            mean_actual_diff = mean(actual_diff, na.rm = TRUE),
            mean_actual_x = mean(actual_x, na.rm = TRUE),
            mean_actual_y = mean(actual_y, na.rm = TRUE),
            sd_prefer = sd(prefer, na.rm = TRUE),
            sd_actual_diff = sd(actual_diff, na.rm = TRUE),
            sd_actual_x = sd(actual_x, na.rm = TRUE),
            sd_actual_y = sd(actual_y, na.rm = TRUE),
            r_prefer_actual = broom::tidy(cor.test(actual_diff, prefer, use = "pairwise.complete.obs")),
            r_actuals = broom::tidy(cor.test(actual_x, actual_y, use = "pairwise.complete.obs"))) |>
            #r_gut = cor(gut_x, gut_y, use = "pairwise.complete.obs"),
            #r_selfother = cor(prefer, others_prefer, use = "pairwise.complete.obs")) |>
  unnest(r_prefer_actual, names_sep = "_") |>
  unnest(r_actuals, names_sep = "_") |>
  mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
  select(domain, n, 
         mean_prefer, mean_actual_diff, mean_actual_x, mean_actual_y, 
         sd_prefer, sd_actual_diff, sd_actual_x, sd_actual_y, 
         r_prefer_actual = r_prefer_actual_estimate, r_prefer_actual_lower = r_prefer_actual_conf.low, r_prefer_actual_upper = r_prefer_actual_conf.high, 
         r_actuals = r_actuals_estimate, r_actuals_lower = r_actuals_conf.low, r_actuals_upper = r_actuals_conf.high)

# dat_r |>
#   kable() |>
#   kable_classic(full_width = FALSE)

dat_r |>
  select(domain, n, 
         r_prefer_actual, r_actuals) |>
  kable() |>
  kable_classic(full_width = FALSE)
domain n r_prefer_actual r_actuals
Burger King - McDonald’s 1758 0.69 0.50
Conservatives - Liberals 1803 0.79 -0.59
Poor People - Rich People 1891 0.53 0.06
Protestants - Catholics 1344 0.64 0.56
  • “Actuals” are two separate ratings for how people ‘actually’ feel about two different concepts, e.g., “Rate your actual feelings toward the topics below: Actual feelings toward burger king [1-10] versus mcdonalds [1-10]”. Depending on whether you thing the two concepts are opposition or not, they should be negative vs positively correlated. The fact that its a single item measure with lots of error probably reduces observed correlations, but single item scales are common.
    • Liking burger king ~ liking mcdonalds: r = .50
    • Liking poor people ~ liking rich people: r = .06
    • Liking protestants ~ liking Catholics: r = .56
    • Liking conservatives ~ liking liberals: r = -.59
  • “Prefer” is a single item relative preference between the two concepts, e.g., “I prefer burger king or mcdonalds [-3 to +3]. Correlating prefer vs the difference score between the two actual items emphasizes that these should be related concepts (both attitudes towards these domains) but merely changing the dimensionality of the questions (absolute vs relative evaluation) can change the results
    • Difference in liking ~ preference for burger king vs liking mcdonalds: r = .69
    • Difference in liking ~ preference for poor people vs rich people: r = .79
    • Difference in liking ~ preference for protestants vs Catholics: r = .53
    • Difference in liking ~ preference for conservatives vs liberals: r = .64

plot actual feelings towards x vs y items

mean_sd_points <- dat_subset %>%
  group_by(domain) %>%
  summarise(
    mean_x = mean(actual_x, na.rm = TRUE),
    mean_y = mean(actual_y, na.rm = TRUE),
    sd_x = sd(actual_x, na.rm = TRUE),
    sd_y = sd(actual_y, na.rm = TRUE)
  )

ggplot(dat_subset, aes(actual_x, actual_y)) +
  geom_jitter(alpha = 0.4) +
  # Red mean dot
  geom_point(
    data = mean_sd_points,
    aes(x = mean_x, y = mean_y),
    color = "red",
    size = 3,
    inherit.aes = FALSE
  ) +
  # Optional: add SD error bars (crosshairs)
  geom_errorbar(
    data = mean_sd_points,
    aes(x = mean_x, ymin = mean_y - sd_y, ymax = mean_y + sd_y),
    color = "red",
    width = 0,
    inherit.aes = FALSE
  ) +
  geom_errorbarh(
    data = mean_sd_points,
    aes(y = mean_y, xmin = mean_x - sd_x, xmax = mean_x + sd_x),
    color = "red",
    height = 0,
    inherit.aes = FALSE
  ) +
  facet_wrap(~ domain) +
  scale_x_continuous(breaks = scales::breaks_pretty(10)) +
  scale_y_continuous(breaks = scales::breaks_pretty(10)) +
  coord_fixed() +
  coord_cartesian(xlim = c(0.5, 10.5), ylim = c(0.5, 10.5))

plot prefer vs actual items

mean_sd_points <- dat_subset %>%
  group_by(domain) %>%
  summarise(
    mean_y = mean(actual_diff, na.rm = TRUE),
    mean_x = mean(prefer, na.rm = TRUE),
    sd_y = sd(actual_diff, na.rm = TRUE),
    sd_x = sd(prefer, na.rm = TRUE)
  )

ggplot(dat_subset, aes(actual_diff, prefer)) +
  geom_jitter(alpha = 0.4) +
  # Red mean dot
  geom_point(
    data = mean_sd_points,
    aes(x = mean_x, y = mean_y),
    color = "red",
    size = 3,
    inherit.aes = FALSE
  ) +
  # Optional: add SD error bars (crosshairs)
  geom_errorbar(
    data = mean_sd_points,
    aes(x = mean_x, ymin = mean_y - sd_y, ymax = mean_y + sd_y),
    color = "red",
    width = 0,
    inherit.aes = FALSE
  ) +
  geom_errorbarh(
    data = mean_sd_points,
    aes(y = mean_y, xmin = mean_x - sd_x, xmax = mean_x + sd_x),
    color = "red",
    height = 0,
    inherit.aes = FALSE
  ) +
  facet_wrap(~ domain) +
  scale_x_continuous(breaks = scales::breaks_pretty(10)) +
  scale_y_continuous(breaks = scales::breaks_pretty(7)) +
  coord_fixed() +
  coord_cartesian(xlim = c(-10.5, 10.5), ylim = c(-3.5, +3.5))

Cohen’s d

data_cohensd <- data_trimmed |>
  group_by(domain) |>
  summarize(n_x = sum(!is.na(actual_x)), 
            n_y = sum(!is.na(actual_y)), 
            mean_x = mean(actual_x, na.rm = TRUE), 
            mean_y = mean(actual_y, na.rm = TRUE), 
            sd_x = sd(actual_x, na.rm = TRUE), 
            sd_y = sd(actual_y, na.rm = TRUE), 
            d = cohens_d(actual_x, actual_y, pooled = TRUE)) |>
  unnest(d) |>
  # negative scores refer to preference for term on left and vice versa
  mutate(Cohens_d = Cohens_d*-1) |> 
  rename(d = Cohens_d, ci_lower = CI_high, ci_upper = CI_low) |>
  select(-CI) |>
  mutate_if(is.numeric, round_half_up, digits = 2) |>
  drop_na()

ggplot(data_cohensd, aes(mean_x, sd_x)) +
  geom_point()

library(tides)

data_cohensd |>
  pivot_longer(
    cols = c(n_x, n_y, mean_x, mean_y, sd_x, sd_y),
    names_to = c(".value", "category"),
    names_sep = "_"
  ) |>
  mutate(min = 1, 
         max = 10, 
         n_items = 1,
         digits = 2,
         calculate_min_sd = TRUE,
         method = "approximate") |>
  tides_df(mean = mean, 
           sd = sd, 
           n = n,
           min = min,
           max = max,
           n_items = n_items,
           digits = digits,
           calculate_min_sd = calculate_min_sd,
           method = method) |>
  plot_tides() +
  geom_hline(yintercept = 4.5*.666, linetype = "dashed", color = "darkred")

data_cohensd |>
  arrange(d) |>
  kable() |>
  kable_classic(full_width = FALSE)
domain n_x n_y mean_x mean_y sd_x sd_y d ci_upper ci_lower
Denzel Washington - Tom Cruise 1583 1582 7.64 5.10 1.83 2.47 -1.17 1.09 1.25
Gun Control - Gun Rights 1169 1166 7.43 4.58 2.63 2.82 -1.04 0.96 1.13
Private - Public 922 918 7.78 5.92 1.80 2.08 -0.95 0.86 1.05
Evolution - Creationism 1354 1350 7.50 4.74 2.80 3.29 -0.90 0.82 0.98
Jazz - Teen Pop 1264 1262 6.66 5.07 2.23 2.34 -0.69 0.61 0.77
Lawyers - Politicians 919 922 5.72 4.53 2.00 1.91 -0.61 0.51 0.70
Mother Teresa - Princess Diana 1071 1073 8.26 7.07 2.02 2.03 -0.58 0.50 0.67
Dogs - Cats 1514 1515 8.04 6.70 2.09 2.75 -0.55 0.47 0.62
Reason - Emotions 1290 1289 7.94 6.90 1.71 2.04 -0.55 0.47 0.63
Night - Morning 1085 1084 7.34 6.24 1.87 2.44 -0.51 0.42 0.59
Wrinkles - Plastic Surgery 965 962 5.27 4.21 2.10 2.23 -0.49 0.40 0.58
Pants - Skirts 1342 1340 7.99 7.00 1.87 2.27 -0.48 0.40 0.55
Helpers - Leaders 1217 1221 7.58 6.94 1.83 2.02 -0.33 0.25 0.41
Protein - Carbohydrates 922 923 7.52 6.91 1.88 2.10 -0.31 0.22 0.40
Relaxing - Exercising 1269 1268 7.76 7.15 1.99 2.18 -0.29 0.21 0.37
Canadian - American 1341 1340 7.54 7.05 1.82 2.18 -0.24 0.17 0.32
Realism - Idealism 896 893 7.20 6.76 1.86 2.01 -0.23 0.14 0.32
Bill Clinton - Hillary Clinton 1062 1062 6.63 6.11 2.33 2.40 -0.22 0.13 0.30
Tall People - Short People 1411 1417 7.22 6.80 1.85 1.90 -0.22 0.15 0.30
Redsox - Yankees 1064 1060 5.74 5.31 2.01 2.24 -0.20 0.12 0.29
State - Church 1064 1061 5.69 5.22 1.95 2.69 -0.20 0.11 0.28
50 Cent - Britney Spears 1180 1180 4.62 4.25 2.00 2.08 -0.18 0.10 0.26
Foreign Places - American Places 1424 1422 7.94 7.61 1.73 1.88 -0.18 0.11 0.26
Protestants - Catholics 952 955 6.20 5.83 2.03 2.18 -0.18 0.09 0.27
David Letterman - Jay Leno 1043 1043 6.14 5.84 2.03 2.08 -0.15 0.06 0.23
Jews - Christians 1443 1447 6.94 6.63 1.99 2.29 -0.15 0.07 0.22
Poor People - Rich People 1441 1439 5.86 5.59 1.79 1.87 -0.15 0.07 0.22
Urban - Rural 1040 1039 6.65 6.36 2.03 2.14 -0.14 0.05 0.23
Burger King - McDonald’s 1335 1335 4.88 4.71 2.36 2.48 -0.07 -0.01 0.15
Effort - Talent 981 980 8.23 8.13 1.87 1.79 -0.06 -0.03 0.14
Lord of the Rings - Harry Potter 988 992 7.15 7.08 2.51 2.54 -0.03 -0.06 0.12
Old People - Young People 1436 1442 7.11 7.09 1.85 1.77 -0.01 -0.06 0.09
Rebellious - Conforming 1330 1329 5.48 5.45 2.04 2.17 -0.01 -0.06 0.09
Friends - Family 1079 1076 7.94 7.94 1.70 2.06 0.00 -0.09 0.08
Organized Labor - Management 1067 1066 6.19 6.20 2.22 2.02 0.01 -0.09 0.08
Microsoft - Apple 1187 1186 6.48 6.53 2.26 2.14 0.03 -0.11 0.06
New York - California 1309 1308 6.85 6.91 2.06 1.95 0.03 -0.11 0.05
West Coast - East Coast 930 929 7.11 7.18 1.97 1.98 0.04 -0.13 0.06
Asians - Whites 1584 1587 7.22 7.41 1.90 1.88 0.10 -0.17 -0.03
African Americans - European Americans 1712 1714 7.09 7.30 1.89 1.83 0.11 -0.18 -0.05
Meg Ryan - Julia Roberts 1355 1354 6.75 7.08 2.00 2.05 0.16 -0.24 -0.09
Artists - Musicians 1029 1031 7.72 8.02 1.87 1.73 0.17 -0.25 -0.08
Difficult - Simple 1234 1229 6.37 6.75 2.06 2.22 0.18 -0.25 -0.10
Coffee - Tea 1332 1337 6.77 7.27 2.80 2.34 0.19 -0.27 -0.12
Mountains - Ocean 1101 1095 7.86 8.24 1.89 1.86 0.20 -0.29 -0.12
Japan - United States 1393 1391 6.72 7.14 1.78 2.17 0.21 -0.29 -0.14
Stable - Flexible 1194 1198 7.22 7.65 1.85 1.78 0.24 -0.32 -0.16
Atheism - Religion 1440 1437 5.18 5.88 2.74 2.67 0.26 -0.33 -0.19
Dramas - Comedies 1004 1003 7.16 7.71 1.90 1.93 0.29 -0.38 -0.20
Pepsi - Coke 1403 1402 5.67 6.44 2.55 2.59 0.30 -0.37 -0.23
Southerners - Northerners 1193 1193 6.43 7.05 2.01 1.80 0.32 -0.40 -0.24
Strong - Sensitive 968 966 6.54 7.20 1.95 1.89 0.34 -0.43 -0.25
Cold - Hot 1279 1280 5.62 6.31 1.97 1.83 0.36 -0.44 -0.28
Skeptical - Trusting 964 969 6.13 6.92 2.16 2.21 0.36 -0.45 -0.27
Numbers - Letters 1342 1344 6.62 7.45 2.25 2.02 0.39 -0.46 -0.31
Rich People - Beautiful People 1112 1110 5.75 6.51 1.86 1.81 0.42 -0.50 -0.33
Receiving - Giving 941 943 7.52 8.33 1.98 1.72 0.43 -0.52 -0.34
Tax Reductions - Social Programs 1126 1127 6.35 7.41 2.54 2.25 0.44 -0.53 -0.36
Muslims - Jews 1392 1392 6.12 7.06 2.18 2.01 0.45 -0.52 -0.37
Meat - Vegetables 1462 1463 6.68 7.84 2.61 2.11 0.49 -0.56 -0.42
Traditional Values - Feminism 1343 1345 5.56 6.80 2.49 2.27 0.52 -0.60 -0.44
Career - Family 1346 1349 6.89 8.02 2.05 1.89 0.57 -0.65 -0.50
Team - Individual 975 974 6.61 7.80 2.19 1.78 0.59 -0.69 -0.50
Past - Future 1332 1331 6.27 7.49 2.14 1.92 0.60 -0.68 -0.53
Gay People - Straight People 1688 1686 7.09 8.35 2.37 1.76 0.61 -0.67 -0.54
Single - Married 1410 1406 5.94 7.28 2.22 2.21 0.61 -0.68 -0.53
Hiphop - Classical 1398 1396 5.53 7.02 2.55 2.27 0.62 -0.70 -0.54
Jocks - Nerds 1334 1330 5.62 6.84 2.07 1.85 0.62 -0.70 -0.54
Prolife - Prochoice 1299 1294 5.22 7.12 3.00 2.92 0.64 -0.72 -0.57
Technology - Nature 1033 1033 7.41 8.52 1.85 1.59 0.64 -0.73 -0.55
Kobe - Shaq 957 956 5.05 6.39 2.06 1.93 0.67 -0.77 -0.58
Tradition - Progress 956 955 6.53 7.82 2.06 1.75 0.67 -0.77 -0.58
Athletic People - Intelligent People 1082 1079 7.22 8.46 1.97 1.53 0.70 -0.79 -0.61
Security - Freedom 1252 1249 7.25 8.59 2.11 1.62 0.71 -0.79 -0.63
Fat People - Thin People 1505 1506 5.48 6.85 1.94 1.72 0.75 -0.82 -0.68
Capital Punishment - Imprisonment 1304 1303 4.30 6.37 2.99 2.46 0.76 -0.84 -0.68
Money - Love 1199 1203 7.15 8.60 2.07 1.73 0.76 -0.85 -0.68
Solitude - Companionship 1033 1032 6.20 7.73 2.18 1.78 0.77 -0.86 -0.68
Winter - Summer 1425 1425 5.66 7.54 2.35 2.13 0.84 -0.92 -0.76
Briefs - Boxers 1010 1011 5.32 7.28 2.40 2.16 0.86 -0.95 -0.76
Drinking - Abstaining 1300 1299 4.72 6.91 2.24 2.25 0.97 -1.05 -0.89
George Bush - John Kerry 1382 1381 3.32 5.80 2.66 2.26 1.01 -1.09 -0.93
Conservatives - Liberals 1186 1188 4.50 6.81 2.31 2.24 1.02 -1.10 -0.93
Manufactured - Natural 894 897 6.22 8.06 1.89 1.71 1.02 -1.12 -0.93
Republicans - Democrats 1249 1249 4.28 6.56 2.34 2.11 1.02 -1.11 -0.94
Speed - Accuracy 1376 1379 6.79 8.57 1.87 1.57 1.04 -1.12 -0.96
Innocence - Wisdom 1049 1046 6.92 8.81 2.10 1.43 1.06 -1.15 -0.96
Corporations - Nonprofits 970 967 5.37 7.61 2.19 1.89 1.09 -1.19 -1.00
Television - Books 1502 1502 6.32 8.55 2.25 1.84 1.09 -1.16 -1.01
Avoiding - Approaching 1123 1120 4.59 7.13 2.12 1.90 1.26 -1.35 -1.17
Astrology - Science 997 996 4.85 8.06 2.58 2.01 1.39 -1.49 -1.29
National Defense - Education 1336 1334 5.93 8.91 2.47 1.60 1.43 -1.51 -1.34
Punishment - Forgiveness 985 989 5.07 8.05 2.32 1.80 1.44 -1.54 -1.34
Determinism - Free will 943 943 4.75 8.21 2.45 1.81 1.61 -1.71 -1.50
Chaos - Order 1055 1053 4.04 7.56 2.17 1.83 1.75 -1.85 -1.65
data_cohensd |>
  summarise(
    percentile = c(1, 5, 10, 25, 50, 75, 90, 95, 99) / 100,
    d = map_dbl(percentile, ~ quantile(d, probs = .x, na.rm = TRUE)),
    .groups = "drop"
  ) |>
  mutate(percentile = percentile * 100,
         d = round_half_up(d, 2)) |>
  kable() |>
  kable_classic(full_width = FALSE)
percentile d
1 -1.05
5 -0.63
10 -0.50
25 -0.17
50 0.26
75 0.67
90 1.03
95 1.30
99 1.62

Single items from indiviudal differences scales - AIID

Largest correlations

dat_subset <- AIID_subset_confirmatory |>
  filter(english_fluency %in% c("English fluent - speak/read it regularly", "English is my primary language")) |>
  filter(complete_individual_differences_data == TRUE) |>
  select(bfi_o1, bfi_o5, bfi_e1, bfi_e5, bfi_n1, bfi_n4) |>
  select(starts_with("bfi", ignore.case = FALSE))

dat_subset |>
  summarize(n_first_half = sum(!is.na(bfi_e1)),
            n_second_half = sum(!is.na(bfi_o1))) |>
  kable() |>
  kable_classic(full_width = FALSE)
n_first_half n_second_half
5191 5221
mat <- dat_subset |>
  cor(use = "pairwise.complete.obs") |>
  janitor::round_half_up(2)

mat |>
  kable() |>
  kable_classic(full_width = FALSE)
bfi_o1 bfi_o5 bfi_e1 bfi_e5 bfi_n1 bfi_n4
bfi_o1 1.00 0.62 NA NA NA NA
bfi_o5 0.62 1.00 NA NA NA NA
bfi_e1 NA NA 1.00 0.61 -0.14 0.03
bfi_e5 NA NA 0.61 1.00 -0.20 -0.09
bfi_n1 NA NA -0.14 -0.20 1.00 0.40
bfi_n4 NA NA 0.03 -0.09 0.40 1.00
  • bfi_o1 “I see myself as a person who… Is original, comes up with new ideas” ~ bfi_o5 “I see myself as a person who… Is inventive” r = .62
  • bfi_e1 “I see myself as a person who… Is talkative” ~ bfie5 “I see myself as a person who… Tends to be quiet” r = -.61
  • bfi_n1 “I see myself as a person who… Gets nervous easily” ~ bfi_n4 “I see myself as a person who… Worries a lot” r = .40

Indiviudal differences scales - Bainbridge

  • N = 388
  • Worry ~ neuroticism, r = .85
  • Verbal aggression ~ agreeableness, r = -.61
  • Openness ~ need for cognition, r = .60

Correlation between weight and height in humans

in US adults, 2008-2012. NHANES CDC data.

library(NHANES)
data(NHANES)

dat <- NHANES |>
  filter(Age >= 18) |>
  filter(!is.na(Height) & !is.na(Weight)) 

ggplot(dat, aes(Weight, Height)) +
  geom_point(alpha = 0.1)

ggplot(dat, aes(Weight, Height)) +
  geom_point(alpha = 0.1) +
  facet_wrap(~ Gender)

dat |>
  summarize(n = n(),
            cor = broom::tidy(cor.test(Weight, Height, use = "pairwise.complete.obs"))) |>
  unnest(cor) |>
  select(n, r = estimate, ci_lower = conf.low, ci_upper = conf.high) |>
  mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
  kable() |>
  kable_classic(full_width = FALSE)
n r ci_lower ci_upper
7414 0.45 0.43 0.47
dat |>
  group_by(Gender) |>
  summarize(n = n(),
            cor = broom::tidy(cor.test(Weight, Height, use = "pairwise.complete.obs"))) |>
  unnest(cor) |>
  select(n, r = estimate, ci_lower = conf.low, ci_upper = conf.high) |>
  mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
  kable() |>
  kable_classic(full_width = FALSE)
n r ci_lower ci_upper
3763 0.28 0.25 0.31
3651 0.39 0.36 0.42

Difference between average height of adult men vs women

dat <- NHANES |>
  filter(Age >= 18) |>
  filter(!is.na(Gender) & !is.na(Height) & !is.na(Weight)) 

dat |>
  summarize(n = n()) |>
  kable() |>
  kable_classic(full_width = FALSE)
n
7414

Weight:

effsize::cohen.d(Weight ~ Gender, data = dat)
## 
## Cohen's d
## 
## d estimate: -0.6685073 (medium)
## 95 percent confidence interval:
##      lower      upper 
## -0.7152995 -0.6217151

Height:

effsize::cohen.d(Height ~ Gender, data = dat)
## 
## Cohen's d
## 
## d estimate: -1.869864 (large)
## 95 percent confidence interval:
##     lower     upper 
## -1.924451 -1.815276